package com.chance.cc.crawler.prod.command.job.domain.news.kr36;

import com.alibaba.fastjson.JSON;
import com.chance.cc.crawler.core.CrawlerEnum;
import com.chance.cc.crawler.core.CrawlerJob;
import com.chance.cc.crawler.core.downloader.HttpPage;
import com.chance.cc.crawler.core.filter.FilterUtils;
import com.chance.cc.crawler.core.record.CrawlerRequestRecord;
import com.chance.cc.crawler.meta.core.bean.CrawlerMetaConstant;
import com.chance.cc.crawler.meta.core.bean.job.CrawlerScheduleJob;
import com.chance.cc.crawler.prod.command.job.domain.news.NewsCommonScript;
import org.apache.commons.lang3.StringUtils;

public class Kr36ArticleRealtimeCrawlerScheduleJob extends NewsCommonScript {

    public static final String domain = "36kr";

    private static final String crawler_level = "realtime";

    private static final String siteBiz = "realtime";

    private static final String site = "information";

    public static void main(String[] args) {
        publishCrawlerScheduleJobInfo();
    }

    private static CrawlerJob publishCrawlerScheduleJobInfo() {
        CrawlerJob crawlerJob = crawlerScheduler();

        //发布定时任务
        CrawlerScheduleJob crawlerScheduleJob = new CrawlerScheduleJob();
        crawlerScheduleJob.setDomain(domain);
        crawlerScheduleJob.setCrawlerJob(JSON.toJSONString(crawlerJob));
        crawlerScheduleJob.setJobType(CrawlerMetaConstant.ScheduleCrawlerJobType.crawler.enumVal());
        crawlerScheduleJob.setNote("36kr资讯板块 每一个小时实时采集任务");
        crawlerScheduleJob.setCrawlerKey(crawlerJob.generateCrawlerKey());
        HttpPage httpPage = metaServiceCommand.addOrUpdateCrawlerScheduleJob(crawlerScheduleJob);
        System.out.println("发布任务：" + httpPage.getRawText());
        return crawlerJob;
    }


    public static CrawlerJob crawlerScheduler() {
        //文章采集
        CrawlerRequestRecord articleCrawler = doSearchNewsCrawler();


        CrawlerJob crawlerJob = CrawlerJob.builder()
                .triggerInfo(domain,
                        CrawlerMetaConstant.ScheduleJobTrigger_Cron,
                        System.currentTimeMillis(),
                        StringUtils.joinWith("-", site, crawler_level, CrawlerMetaConstant.ScheduleJobTriggerJob_Realtime))
                .crawlerRequestQueue(CrawlerMetaConstant.redisRequestQueue(StringUtils.joinWith("-", "crawler", domain, site, siteBiz, crawler_level, "queue")))
                //.fileResultPipeline(null,"/data/chance_crawler_runner/logs/node/tianya.log",false)
                .kafkaResultPipeline(null, kafkaTopic, null)
                .crawlerJobThreadNumber(10)//线程数
                .requestRecord(articleCrawler)
                .build();

        return crawlerJob;
    }

    private static CrawlerRequestRecord doSearchNewsCrawler() {



        CrawlerRequestRecord requestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, CrawlerEnum.CrawlerRequestType.turnPage) //turnpage 翻页
                .domain(domain)
                .httpUrl("https://36kr.com/")
                .recordKey("https://36kr.com/")
                .releaseTime(System.currentTimeMillis())
                .filter(CrawlerEnum.CrawlerRecordFilter.dateRange)
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(1, null))
                .build();

        requestRecord.tagsCreator().bizTags().addDomain(domain);
        requestRecord.tagsCreator().bizTags().addSite(site);
        requestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);
        return requestRecord;
    }
}
